# =============================================================================
# DYNAMIC BAYESIAN LATENT VARIABLE MODEL
# Bayesian estimation of legislative power-sharing using V-Dem indicators
# Adapted from Reuning, Kenwick, and Farris (2019); code pulled from Political Analysis Dataverse.
# =============================================================================


# NOTE: This model requires high-performance computing resources
# It is too computationally intensive for local execution

# SET WORKING DIRECTORY

# --- PACKAGES ---------------------------------------------------------------
# install.packages("rstan", repos = "http://cran.us.r-project.org")
library(rstan)
# install.packages("loo", repos = "http://cran.us.r-project.org")
library(loo)
# install.packages("coda", repos = "http://cran.us.r-project.org")
library(coda)

# ---HELPER FUNCTIONS ---------------------------------------------------------------
# Inverse logit function 
inv.logit <- function(x) 1 / (1 + exp(-x))

# N SAMPLES
n.samples <- 1000

# --- DATA PREP ---------------------------------------------------------------
df <- read.csv("data_lvm.csv")

prepare.data <- function(data, include.extra = FALSE) {
  if (include.extra) {
    final <- data.frame(cbind(
      unclass(factor(data$v2exdfdshs_ord)),
      unclass(factor(data$v2exdfvths_ord)),
      unclass(factor(data$v2exdfpphs_ord)),
      unclass(factor(data$v2lginvstp_ord)),
      unclass(factor(data$v2lgoppart_ord)), 
      unclass(factor(data$v2lgfunds_ord)),
      unclass(factor(data$v2lgcomslo_ord)), 
      unclass(factor(data$v2lgintbup)),
      unclass(factor(data$v2lgintblo)),
      unclass(factor(data$v2lgqstexp))
    ))
    names(final) <- c(
      'v2exdfdshs_ord','v2exdfvths_ord','v2exdfpphs_ord','v2lginvstp_ord',
      'v2lgoppart_ord','v2lgfunds_ord','v2lgcomslo_ord','v2lgintbup',
      'v2lgintblo','v2lgqstexp'
    )
  } else {
    final <- data.frame(cbind(
      unclass(factor(data$v2exdfdshs_ord)),
      unclass(factor(data$v2exdfvths_ord)),
      unclass(factor(data$v2exdfpphs_ord)),
      unclass(factor(data$v2lginvstp_ord)),
      unclass(factor(data$v2lgoppart_ord)), 
      unclass(factor(data$v2lgfunds_ord)),
      unclass(factor(data$v2lgcomslo_ord)), 
      unclass(factor(data$v2lgintbup)),
      unclass(factor(data$v2lgintblo)),
      unclass(factor(data$v2lgqstexp))
    ))
    names(final) <- c(
      'v2exdfdshs_ord','v2exdfvths_ord','v2exdfpphs_ord','v2lginvstp_ord',
      'v2lgoppart_ord','v2lgfunds_ord','v2lgcomslo_ord','v2lgintbup',
      'v2lgintblo','v2lgqstexp'
    )
  }
  rownames(final) <- paste(1:length(data$country), data$country, data$year)
  final
}

df <- prepare.data(df)

df$count.year <- rownames(df)
for (ii in 1:nrow(df)) {
  tmp <- strsplit(df$count.year[ii], " ")[[1]]
  tmp <- tmp[-1]
  year <- as.numeric(tmp[length(tmp)])
  df$year[ii] <- as.numeric(tmp[length(tmp)])
  tmp <- tmp[-length(tmp)]
  count <- paste(tmp, collapse = " ")
  df$count[ii] <- paste(tmp, collapse = " ")
  df$year.count[ii] <- paste(year, count)
}

df$count.year.id <- as.numeric(as.factor(df$year.count))
df <- df[order(df$year.count), ]

# --- LEGISLATIVE INDICATORS ---------------------------------------------------------------
# v2exdfdshs_ord
ind_4_v2exdfdshs <- df[, 1]
year_count_ind_4_v2exdfdshs <- df$count.year.id
year_count_ind_4_v2exdfdshs <- year_count_ind_4_v2exdfdshs[!is.na(ind_4_v2exdfdshs)]
ind_4_v2exdfdshs <- ind_4_v2exdfdshs[!is.na(ind_4_v2exdfdshs)]
n_4_v2exdfdshs <- length(ind_4_v2exdfdshs)

# v2exdfvths_ord
ind_5_v2exdfvths <- df[, 2]
year_count_ind_5_v2exdfvths <- df$count.year.id
year_count_ind_5_v2exdfvths <- year_count_ind_5_v2exdfvths[!is.na(ind_5_v2exdfvths)]
ind_5_v2exdfvths <- ind_5_v2exdfvths[!is.na(ind_5_v2exdfvths)]
n_5_v2exdfvths <- length(ind_5_v2exdfvths)

# v2exdfpphs_ord
ind_3_v2exdfpphs <- df[, 3]
year_count_ind_3_v2exdfpphs <- df$count.year.id
year_count_ind_3_v2exdfpphs <- year_count_ind_3_v2exdfpphs[!is.na(ind_3_v2exdfpphs)]
ind_3_v2exdfpphs <- ind_3_v2exdfpphs[!is.na(ind_3_v2exdfpphs)]
n_3_v2exdfpphs <- length(ind_3_v2exdfpphs)

# v2lginvstp_ord
ind_5_v2lginvstp <- df[, 4]
year_count_ind_5_v2lginvstp <- df$count.year.id
year_count_ind_5_v2lginvstp <- year_count_ind_5_v2lginvstp[!is.na(ind_5_v2lginvstp)]
ind_5_v2lginvstp <- ind_5_v2lginvstp[!is.na(ind_5_v2lginvstp)]
n_5_v2lginvstp <- length(ind_5_v2lginvstp)

# v2lgoppart_ord
ind_3_v2lgoppart <- df[, 5]
year_count_ind_3_v2lgoppart <- df$count.year.id
year_count_ind_3_v2lgoppart <- year_count_ind_3_v2lgoppart[!is.na(ind_3_v2lgoppart)]
ind_3_v2lgoppart <- ind_3_v2lgoppart[!is.na(ind_3_v2lgoppart)]
n_3_v2lgoppart <- length(ind_3_v2lgoppart)

# v2lgfunds_ord
ind_2_v2lgfunds <- df[, 6]
year_count_ind_2_v2lgfunds <- df$count.year.id
year_count_ind_2_v2lgfunds <- year_count_ind_2_v2lgfunds[!is.na(ind_2_v2lgfunds)]
ind_2_v2lgfunds <- ind_2_v2lgfunds[!is.na(ind_2_v2lgfunds)]
n_2_v2lgfunds <- length(ind_2_v2lgfunds)

# v2lgcomslo_ord
ind_4_v2lgcomslo <- df[, 7]
year_count_ind_4_v2lgcomslo <- df$count.year.id
year_count_ind_4_v2lgcomslo <- year_count_ind_4_v2lgcomslo[!is.na(ind_4_v2lgcomslo)]
ind_4_v2lgcomslo <- ind_4_v2lgcomslo[!is.na(ind_4_v2lgcomslo)]
n_4_v2lgcomslo <- length(ind_4_v2lgcomslo)

# v2lgintbup
ind_2_v2lgintbup <- df[, 8]
year_count_ind_2_v2lgintbup <- df$count.year.id
year_count_ind_2_v2lgintbup <- year_count_ind_2_v2lgintbup[!is.na(ind_2_v2lgintbup)]
ind_2_v2lgintbup <- ind_2_v2lgintbup[!is.na(ind_2_v2lgintbup)]
n_2_v2lgintbup <- length(ind_2_v2lgintbup)

# v2lgintblo
ind_2_v2lgintblo <- df[, 9]
year_count_ind_2_v2lgintblo <- df$count.year.id
year_count_ind_2_v2lgintblo <- year_count_ind_2_v2lgintblo[!is.na(ind_2_v2lgintblo)]
ind_2_v2lgintblo <- ind_2_v2lgintblo[!is.na(ind_2_v2lgintblo)]
n_2_v2lgintblo <- length(ind_2_v2lgintblo)

# v2lgqstexp
ind_2_v2lgqstexp <- df[, 10]
year_count_ind_2_v2lgqstexp <- df$count.year.id
year_count_ind_2_v2lgqstexp <- year_count_ind_2_v2lgqstexp[!is.na(ind_2_v2lgqstexp)]
ind_2_v2lgqstexp <- ind_2_v2lgqstexp[!is.na(ind_2_v2lgqstexp)]
n_2_v2lgqstexp <- length(ind_2_v2lgqstexp)

# --- ORDER DF ---------------------------------------------------------------
df <- df[order(df$count.year.id), ]

df$prev.year <- NA
for (ii in 1:nrow(df)) {
  tmp.c <- df$count[ii]
  tmp.y <- df$year[ii]
  tmp.df <- df[df$count == tmp.c & df$year < tmp.y, ]
  if (nrow(tmp.df) == 0) {
    df$prev.year[ii] <- 0
  } else {
    tmp.df <- tmp.df[order(tmp.df$year), ]
    df$prev.year[ii] <- tmp.df$count.year.id[nrow(tmp.df)]
  }
}

# --- DATA LIST FOR STAN ---------------------------------------------------------------
stan.data <- list(
  years_count_ind_4_v2exdfdshs = year_count_ind_4_v2exdfdshs,
  items_4_v2exdfdshs = ind_4_v2exdfdshs,
  n_4_v2exdfdshs = n_4_v2exdfdshs,
  years_count_ind_5_v2exdfvths = year_count_ind_5_v2exdfvths,
  items_5_v2exdfvths = ind_5_v2exdfvths,
  n_5_v2exdfvths = n_5_v2exdfvths,
  years_count_ind_3_v2exdfpphs = year_count_ind_3_v2exdfpphs,
  items_3_v2exdfpphs = ind_3_v2exdfpphs,
  n_3_v2exdfpphs = n_3_v2exdfpphs,
  years_count_ind_5_v2lginvstp = year_count_ind_5_v2lginvstp,
  items_5_v2lginvstp = ind_5_v2lginvstp,
  n_5_v2lginvstp = n_5_v2lginvstp,
  years_count_ind_3_v2lgoppart = year_count_ind_3_v2lgoppart,
  items_3_v2lgoppart = ind_3_v2lgoppart,
  n_3_v2lgoppart = n_3_v2lgoppart,
  years_count_ind_2_v2lgfunds = year_count_ind_2_v2lgfunds,
  items_2_v2lgfunds = ind_2_v2lgfunds,
  n_2_v2lgfunds = n_2_v2lgfunds,
  years_count_ind_4_v2lgcomslo = year_count_ind_4_v2lgcomslo,
  items_4_v2lgcomslo = ind_4_v2lgcomslo,
  n_4_v2lgcomslo = n_4_v2lgcomslo,
  years_count_ind_2_v2lgintbup = year_count_ind_2_v2lgintbup,
  items_2_v2lgintbup = ind_2_v2lgintbup,
  n_2_v2lgintbup = n_2_v2lgintbup,
  years_count_ind_2_v2lgintblo = year_count_ind_2_v2lgintblo,
  items_2_v2lgintblo = ind_2_v2lgintblo,
  n_2_v2lgintblo = n_2_v2lgintblo,
  years_count_ind_2_v2lgqstexp = year_count_ind_2_v2lgqstexp,
  items_2_v2lgqstexp = ind_2_v2lgqstexp,
  n_2_v2lgqstexp = n_2_v2lgqstexp,
  n_years_count = max(df$count.year.id),
  thetas_past = df$prev.year,
  nu = 4
)

# --- FITTING THE MODEL ---------------------------------------------------------------
set.seed(1)

mod.dyn <- stan(
  file = "Dynamic.stan",
  data = stan.data, seed = 570175513, thin = 5,
  iter = 10000
)

# --- GATHER ESTIMATES ---------------------------------------------------------------
out <- extract(mod.dyn)

df <- df[order(df$count.year.id), ]
df$dyn.estimates <- apply(out$thetas, 2, median)
df$dyn.up        <- apply(out$thetas, 2, quantile, 0.975)
df$dyn.lo        <- apply(out$thetas, 2, quantile, 0.025)

write.csv(df, "estimates_independent.csv")

# --- SAVE STAN FIT ---------------------------------------------------------------
saveRDS(mod.dyn, "dynamic_fit.rds")

# load
mod.dyn <- readRDS("dynamic_fit.rds")

# --- CONVERGENCE DIAGNOSTICS---------------------------------------------------------------
params <- c(
  "innov", "c_n_4_v2exdfdshs", "c_n_5_v2exdfvths", "c_n_3_v2exdfpphs", "c_n_5_v2lginvstp", 
  "c_n_3_v2lgoppart", "c_n_2_v2lgfunds", "c_n_4_v2lgcomslo", "c_n_2_v2lgintbup", "c_n_2_v2lgintblo",
  "c_n_2_v2lgqstexp", paste("beta[", 1:10, "]", sep = "")
)

params.all <- names(mod.dyn)
params.use <- c(
  grep("thetas", params.all, value = TRUE),
  grep(paste(params, collapse = "|"), params.all, value = TRUE),
  params[params %in% params.all]
)

pdf("Dynamic_Convergence.pdf", height = 6, width = 6)
stan_rhat(mod.dyn, pars = params.use, fill = "springgreen")
dev.off()

# --- CLEAN ---------------------------------------------------------------
rm(mod.dyn)
sapply(1:5, gc)
